from sklearn.model_selection import train_test_split

import pandas as pd


def get_data(path, label_name):
    """
    :param path:  csv or excel
    :param label_name:
    :return:
    """
    if path.endswith("csv"):
        data = pd.read_csv(path)
    elif path.endswith("xlsx") or path.endswith("xls"):
        data = pd.read_excel(path)
    else:
        print("数据格式only支持 csv  excel")
        return
    data = data.drop(['Unnamed: 0'], axis=1)
    data = data.drop(['Date'], axis=1)
    label = data['%s' % label_name]
    data = data.drop(['%s' % label_name], axis=1)
    data = data.iloc[1:, :]
    data = data.fillna(0)

    label = label.iloc[0:-1]

    print("------->:", "train-shape=", data.shape, "  label", label.shape)
    x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.3)
    return (x_train, y_train), (x_test, y_test)




if __name__ == '__main__':
    get_data("D:\workspace\codes\machine-learning\dataset\data\data.csv", "Volume")